%R code for implementing HERRA in GECCO data %\renewcommand{\baselinestretch}{1.0} %\setlength{\tabcolsep}{1.0mm} \scriptsize \begin{verbatim} \begin{lstlisting}[language=R] library("glmnet") fun.confounders<-function(x,y, pf, a,b,n,lam){ model<-glmnet(x[a:b,],y[a:b],type.gaussian="naive",lambda=lam,penalty.factor=pf) coef<-predict(model,type="coefficients") n.conf = sum(pf==0) ### num of confounders nonzerocoef<-c(seq(1:(n.conf)), which(coef[-(1:(n.conf+1))]!= 0)+n.conf) nh = round(n/2) a2<-ifelse(a==1,nh+1,1) b2<-ifelse(b==nh,n,nh) output1<-lm(y[a2:b2]~x[a2:b2,nonzerocoef]) print(length(nonzerocoef)) return(summary(output1)$sigma^2) } #### The dataset includes d:disease status; covariates: study,age,sex; #### geno: genotype matrix from the top SNPs cc = data.matrix(cbind(study,age,sex)) #### Step 1: ITRRS, iterative ridge for dimension reduction lambda.seq = c(0.006, 0.008, 0.01, 0.0102, 0.0104) res = matrix(0,length(lambda.seq),5) for (lambdai in 1:5){ xdat.all=NULL for (i in 1:22){ #ITRRS for each chromosome filename = paste("...",i,".geno",sep="") #data of chromosome i xdat = read.table(filename, header=T) xdat2 = data.matrix(xdat) rm(xdat) for (itrrs in 1:5) { pf<-c(rep(0,ncol(cc)),rep(1,ncol(xdat2))) xdat3<-cbind(cc,xdat2) lam<-lambda.seq[lambdai] if (itrrs>3) { cv = cv.glmnet(xdat3,d,family="gaussian",nfold=10,alpha=0,penalty.factor=pf) lam<-cv$lambda.min } model<-glmnet(xdat3,d,family="gaussian",type.gaussian="naive",lambda=lam,alpha=0,penalty.factor=pf) coef0<-predict(model,type="coefficients") coef = abs(coef0[-(1:(ncol(cc)+1)),]) inout<-ifelse(coef>median(coef),1,0) xdat2<-xdat2[,inout==1] print(c(lam,median(coef), dim(xdat2))) } xdat.all = cbind(xdat.all,xdat2) } filename = paste("...",lambda.seq[lambdai],".Rdata",sep="") save(xdat.all,file=filename) #### END of Step 1 #### Steps 2-4: lasso and OLS xdat3 = cbind(cc, xdat.all) n = nrow(xdat3) nh = round(n/2) nfolds=10 pf<-c(rep(0,ncol(cc)),rep(1,ncol(xdat.all))) cv <- cv.glmnet(xdat3[1:nh,],d[1:nh], nfolds=nfolds,penalty.factor=pf) lam<-cv$lambda.min est.sig.e1ls<-fun.confounders(xdat3,d,pf,1,nh,n,lam) cv <- cv.glmnet(xdat3[(nh+1):n,],d[(nh+1):n],nfolds=nfolds,penalty.factor=pf) lam<-cv$lambda.min est.sig.e2ls<-fun.confounders(xdat3,d,pf,nh+1,n,n,lam) est.sig.els<-(est.sig.e1ls+est.sig.e2ls)/2 vary = summary(lm(d~cc))$sigma^2 h2.ls<-(vary - est.sig.els)/vary K<-0.004 p<-mean(d) z<-dnorm(qnorm(1-K))^2 h2.liab<-h2.ls*(K*(1-K))^2/(p*(1-p))/z h2.est = c(h2.ls, h2.liab) res[lambdai,]=c(lambda.seq[lambdai],h2.est) print("Results:") print(c(lambda.seq[lambdai], h2.est)) } %\end{lstlisting} \end{verbatim} %\clearpage